int violation = 0, i, j;
struct chwall_ssid *chwall_ssid;
ssidref_t chwall_ssidref;
- struct domain **pd;
+ struct domain *d;
- write_lock(&domlist_lock);
+ spin_lock(&domlist_update_lock);
/* go through all domains and adjust policy as if this domain was started now */
- pd = &domain_list;
- for (pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list)
+ for_each_domain ( d )
{
chwall_ssid =
GET_SSIDP(ACM_CHINESE_WALL_POLICY,
- (struct acm_ssid_domain *) (*pd)->ssid);
+ (struct acm_ssid_domain *)d->ssid);
chwall_ssidref = chwall_ssid->chwall_ssidref;
traceprintk("%s: validating policy for domain %x (chwall-REF=%x).\n",
- __func__, (*pd)->domain_id, chwall_ssidref);
+ __func__, d->domain_id, chwall_ssidref);
/* a) adjust types ref-count for running domains */
for (i = 0; i < chwall_buf->chwall_max_types; i++)
running_types[i] +=
}
}
out:
- write_unlock(&domlist_lock);
+ spin_unlock(&domlist_update_lock);
return violation;
/* returning "violation != 0" means that the currently running set of domains would
* not be possible if the new policy had been enforced before starting them; for chinese
int violation = 1;
struct ste_ssid *ste_ssid, *ste_rssid;
ssidref_t ste_ssidref, ste_rssidref;
- struct domain **pd, *rdom;
+ struct domain *d, *rdom;
domid_t rdomid;
struct grant_entry sha_copy;
int port, i;
- read_lock(&domlist_lock); /* go by domain? or directly by global? event/grant list */
+ rcu_read_lock(&domlist_read_lock);
+ /* go by domain? or directly by global? event/grant list */
/* go through all domains and adjust policy as if this domain was started now */
- pd = &domain_list;
- for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
+ for_each_domain ( d )
+ {
ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY,
- (struct acm_ssid_domain *)(*pd)->ssid);
+ (struct acm_ssid_domain *)d->ssid);
ste_ssidref = ste_ssid->ste_ssidref;
traceprintk("%s: validating policy for eventch domain %x (ste-Ref=%x).\n",
- __func__, (*pd)->domain_id, ste_ssidref);
+ __func__, d->domain_id, ste_ssidref);
/* a) check for event channel conflicts */
for (port=0; port < NR_EVTCHN_BUCKETS; port++) {
- spin_lock(&(*pd)->evtchn_lock);
- if ((*pd)->evtchn[port] == NULL) {
- spin_unlock(&(*pd)->evtchn_lock);
+ spin_lock(&d->evtchn_lock);
+ if (d->evtchn[port] == NULL) {
+ spin_unlock(&d->evtchn_lock);
continue;
}
- if ((*pd)->evtchn[port]->state == ECS_INTERDOMAIN) {
- rdom = (*pd)->evtchn[port]->u.interdomain.remote_dom;
+ if (d->evtchn[port]->state == ECS_INTERDOMAIN) {
+ rdom = d->evtchn[port]->u.interdomain.remote_dom;
rdomid = rdom->domain_id;
/* rdom now has remote domain */
ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY,
(struct acm_ssid_domain *)(rdom->ssid));
ste_rssidref = ste_rssid->ste_ssidref;
- } else if ((*pd)->evtchn[port]->state == ECS_UNBOUND) {
- rdomid = (*pd)->evtchn[port]->u.unbound.remote_domid;
+ } else if (d->evtchn[port]->state == ECS_UNBOUND) {
+ rdomid = d->evtchn[port]->u.unbound.remote_domid;
if ((rdom = get_domain_by_id(rdomid)) == NULL) {
printk("%s: Error finding domain to id %x!\n", __func__, rdomid);
goto out;
ste_rssidref = ste_rssid->ste_ssidref;
put_domain(rdom);
} else {
- spin_unlock(&(*pd)->evtchn_lock);
+ spin_unlock(&d->evtchn_lock);
continue; /* port unused */
}
- spin_unlock(&(*pd)->evtchn_lock);
+ spin_unlock(&d->evtchn_lock);
/* rdom now has remote domain */
ste_rssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY,
(struct acm_ssid_domain *)(rdom->ssid));
ste_rssidref = ste_rssid->ste_ssidref;
traceprintk("%s: eventch: domain %x (ssidref %x) --> domain %x (rssidref %x) used (port %x).\n",
- __func__, (*pd)->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port);
+ __func__, d->domain_id, ste_ssidref, rdom->domain_id, ste_rssidref, port);
/* check whether on subj->ssid, obj->ssid share a common type*/
if (!have_common_type(ste_ssidref, ste_rssidref)) {
printkd("%s: Policy violation in event channel domain %x -> domain %x.\n",
- __func__, (*pd)->domain_id, rdomid);
+ __func__, d->domain_id, rdomid);
goto out;
}
}
/* b) check for grant table conflicts on shared pages */
- spin_lock(&(*pd)->grant_table->lock);
- for ( i = 0; i < nr_grant_entries((*pd)->grant_table); i++ ) {
+ spin_lock(&d->grant_table->lock);
+ for ( i = 0; i < nr_grant_entries(d->grant_table); i++ ) {
#define SPP (PAGE_SIZE / sizeof(struct grant_entry))
- sha_copy = (*pd)->grant_table->shared[i/SPP][i%SPP];
+ sha_copy = d->grant_table->shared[i/SPP][i%SPP];
if ( sha_copy.flags ) {
printkd("%s: grant dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%lx)\n",
- __func__, (*pd)->domain_id, i, sha_copy.flags, sha_copy.domid,
+ __func__, d->domain_id, i, sha_copy.flags, sha_copy.domid,
(unsigned long)sha_copy.frame);
rdomid = sha_copy.domid;
if ((rdom = get_domain_by_id(rdomid)) == NULL) {
- spin_unlock(&(*pd)->grant_table->lock);
+ spin_unlock(&d->grant_table->lock);
printkd("%s: domain not found ERROR!\n", __func__);
goto out;
};
ste_rssidref = ste_rssid->ste_ssidref;
put_domain(rdom);
if (!have_common_type(ste_ssidref, ste_rssidref)) {
- spin_unlock(&(*pd)->grant_table->lock);
+ spin_unlock(&d->grant_table->lock);
printkd("%s: Policy violation in grant table sharing domain %x -> domain %x.\n",
- __func__, (*pd)->domain_id, rdomid);
+ __func__, d->domain_id, rdomid);
goto out;
}
}
}
- spin_unlock(&(*pd)->grant_table->lock);
+ spin_unlock(&d->grant_table->lock);
}
violation = 0;
out:
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
return violation;
/* returning "violation != 0" means that existing sharing between domains would not
* have been allowed if the new policy had been enforced before the sharing; for ste,
struct acm_ste_policy_buffer *ste_buf = (struct acm_ste_policy_buffer *)buf;
void *ssidrefsbuf;
struct ste_ssid *ste_ssid;
- struct domain **pd;
+ struct domain *d;
int i;
if (buf_size < sizeof(struct acm_ste_policy_buffer))
ste_bin_pol.ssidrefs = (domaintype_t *)ssidrefsbuf;
/* clear all ste caches */
- read_lock(&domlist_lock);
- pd = &domain_list;
- for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
+ rcu_read_lock(&domlist_read_lock);
+ for_each_domain ( d ) {
ste_ssid = GET_SSIDP(ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY,
- (struct acm_ssid_domain *)(*pd)->ssid);
+ (struct acm_ssid_domain *)(d)->ssid);
for (i=0; i<ACM_TE_CACHE_SIZE; i++)
ste_ssid->ste_cache[i].valid = ACM_STE_free;
}
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
return ACM_OK;
error_free:
{
struct ste_ssid *ste_ssid;
int i;
- struct domain **pd;
+ struct domain *d;
struct acm_ssid_domain *ssid;
printkd("deleting cache for dom %x.\n", id);
- read_lock(&domlist_lock); /* look through caches of all domains */
- pd = &domain_list;
- for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list ) {
- ssid = (struct acm_ssid_domain *)((*pd)->ssid);
+ rcu_read_lock(&domlist_read_lock);
+ /* look through caches of all domains */
+ for_each_domain ( d ) {
+ ssid = (struct acm_ssid_domain *)(d->ssid);
if (ssid == NULL)
continue; /* hanging domain structure, no ssid any more ... */
ste_ssid->ste_cache[i].valid = ACM_STE_free;
}
out:
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
}
/***************************
/* this route is for dump routine */
unw_init_running(try_crashdump, pt);
} else {
+ rcu_read_lock(&domlist_read_lock);
for_each_domain(d) {
for_each_vcpu(d, v) {
printk("Backtrace of current vcpu "
show_stack(v, NULL);
}
}
+ rcu_read_unlock(&domlist_read_lock);
}
}
unw_init_running(freeze_cpu_osinit, NULL);
/*
* note: some functions mask interrupt with this lock held
* so that this lock can't be locked from interrupt handler.
- * lock order domlist_lock => xenpfm_context_lock
*/
DEFINE_SPINLOCK(xenpfm_context_lock);
arg.error[cpu] = 0;
BUG_ON(in_irq());
- read_lock(&domlist_lock);
spin_lock(&xenpfm_context_lock);
error = xenpfm_start_stop_locked(0);
- read_unlock(&domlist_lock);
if (error) {
spin_unlock(&xenpfm_context_lock);
return error;
while (atomic_read(&arg.started) != cpus)
cpu_relax();
- for_each_domain(d) {
+ rcu_read_lock(&domlist_read_lock);
+ for_each_domain(d)
for_each_vcpu(d, v)
xenpfm_start_stop_vcpu(v, is_start);
- }
+ rcu_read_unlock(&domlist_read_lock);
arg.error[smp_processor_id()] = __xenpfm_start_stop(is_start);
atomic_inc(&arg.finished);
int error;
BUG_ON(in_irq());
- read_lock(&domlist_lock);
spin_lock(&xenpfm_context_lock);
- error =xenpfm_start_stop_locked(is_start);
+ error = xenpfm_start_stop_locked(is_start);
spin_unlock(&xenpfm_context_lock);
- read_unlock(&domlist_lock);
return error;
}
void audit_domains(void)
{
struct domain *d;
+ rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
audit_domain(d);
+ rcu_read_unlock(&domlist_read_lock);
}
void audit_domains_key(unsigned char key)
struct vcpu *v;
printk("*********** VMCB Areas **************\n");
+
+ rcu_read_lock(&domlist_read_lock);
+
for_each_domain ( d )
{
if ( !is_hvm_domain(d) )
}
}
+ rcu_read_unlock(&domlist_read_lock);
+
printk("**************************************\n");
}
struct vcpu *v;
printk("*********** VMCS Areas **************\n");
+
+ rcu_read_lock(&domlist_read_lock);
+
for_each_domain ( d )
{
if ( !is_hvm_domain(d) )
}
}
+ rcu_read_unlock(&domlist_read_lock);
+
printk("**************************************\n");
}
{
struct domain *d;
printk("'%c' pressed -> blowing all shadow tables\n", c);
+ rcu_read_lock(&domlist_read_lock);
for_each_domain(d)
+ {
if ( shadow_mode_enabled(d) && d->vcpu[0] != NULL )
{
shadow_lock(d);
shadow_blow_tables(d);
shadow_unlock(d);
}
+ }
+ rcu_read_unlock(&domlist_read_lock);
}
/* Register this function in the Xen console keypress table */
wc_nsec = _wc_nsec = (u32)y;
spin_unlock(&wc_lock);
- read_lock(&domlist_lock);
+ rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
update_domain_wallclock_time(d);
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
}
static void local_time_calibration(void *unused)
#include <xen/shutdown.h>
#include <xen/percpu.h>
#include <xen/multicall.h>
+#include <xen/rcupdate.h>
#include <asm/debugger.h>
#include <public/sched.h>
#include <public/vcpu.h>
-/* Both these structures are protected by the domlist_lock. */
-DEFINE_RWLOCK(domlist_lock);
-struct domain *domain_hash[DOMAIN_HASH_SIZE];
+/* Protect updates/reads (resp.) of domain_list and domain_hash. */
+DEFINE_SPINLOCK(domlist_update_lock);
+DEFINE_RCU_READ_LOCK(domlist_read_lock);
+
+#define DOMAIN_HASH_SIZE 256
+#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
+static struct domain *domain_hash[DOMAIN_HASH_SIZE];
struct domain *domain_list;
struct domain *dom0;
if ( !is_idle_domain(d) )
{
- write_lock(&domlist_lock);
+ spin_lock(&domlist_update_lock);
pd = &domain_list; /* NB. domain_list maintained in order of domid. */
for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
if ( (*pd)->domain_id > d->domain_id )
break;
d->next_in_list = *pd;
- *pd = d;
d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
- domain_hash[DOMAIN_HASH(domid)] = d;
- write_unlock(&domlist_lock);
+ /* Two rcu assignments are not atomic
+ * Readers may see inconsistent domlist and hash table
+ * That is OK as long as each RCU reader-side critical section uses
+ * only one or them */
+ rcu_assign_pointer(*pd, d);
+ rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
+ spin_unlock(&domlist_update_lock);
}
return d;
{
struct domain *d;
- read_lock(&domlist_lock);
- d = domain_hash[DOMAIN_HASH(dom)];
+ rcu_read_lock(&domlist_read_lock);
+ d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
while ( d != NULL )
{
if ( d->domain_id == dom )
d = NULL;
break;
}
- d = d->next_in_hashbucket;
+ d = rcu_dereference(d->next_in_hashbucket);
}
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
return d;
}
send_guest_global_virq(dom0, VIRQ_DEBUGGER);
}
+/* Complete domain destroy after RCU readers are not holding
+ old references */
+static void complete_domain_destroy(struct rcu_head *head)
+{
+ struct domain *d = container_of(head, struct domain, rcu);
+
+ rangeset_domain_destroy(d);
+
+ evtchn_destroy(d);
+ grant_table_destroy(d);
+
+ arch_domain_destroy(d);
+
+ free_domain(d);
+
+ send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+}
/* Release resources belonging to task @p. */
void domain_destroy(struct domain *d)
return;
/* Delete from task list and task hashtable. */
- write_lock(&domlist_lock);
+ spin_lock(&domlist_update_lock);
pd = &domain_list;
while ( *pd != d )
pd = &(*pd)->next_in_list;
- *pd = d->next_in_list;
+ rcu_assign_pointer(*pd, d->next_in_list);
pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
while ( *pd != d )
pd = &(*pd)->next_in_hashbucket;
- *pd = d->next_in_hashbucket;
- write_unlock(&domlist_lock);
-
- rangeset_domain_destroy(d);
-
- evtchn_destroy(d);
- grant_table_destroy(d);
-
- arch_domain_destroy(d);
+ rcu_assign_pointer(*pd, d->next_in_hashbucket);
+ spin_unlock(&domlist_update_lock);
- free_domain(d);
-
- send_guest_global_virq(dom0, VIRQ_DOM_EXC);
+ /* schedule RCU asynchronous completion of domain destroy */
+ call_rcu(&d->rcu, complete_domain_destroy);
}
static void vcpu_pause_setup(struct vcpu *v)
#include <xen/trace.h>
#include <xen/console.h>
#include <xen/iocap.h>
+#include <xen/rcupdate.h>
#include <xen/guest_access.h>
#include <xen/bitmap.h>
#include <asm/current.h>
cpumask_t cpu_exclude_map;
/* Do an initial CPU placement. Pick the least-populated CPU. */
- read_lock(&domlist_lock);
+ rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
for_each_vcpu ( d, v )
if ( !test_bit(_VCPUF_down, &v->vcpu_flags) )
cnt[v->processor]++;
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
/*
* If we're on a HT system, we only auto-allocate to a non-primary HT. We
if ( dom == DOMID_SELF )
dom = current->domain->domain_id;
- read_lock(&domlist_lock);
+ rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
{
if ( (d == NULL) || !get_domain(d) )
{
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
ret = -ESRCH;
break;
}
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
getdomaininfo(d, &op->u.getdomaininfo);
printk("'%c' pressed -> dumping domain info (now=0x%X:%08X)\n", key,
(u32)(now>>32), (u32)now);
- read_lock(&domlist_lock);
+ rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
{
}
}
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
}
static cpumask_t read_clocks_cpumask = CPU_MASK_NONE;
loop = 0;
printk("\nnot on Q\n");
+ rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
{
for_each_vcpu(d, ed)
}
}
}
+ rcu_read_unlock(&domlist_read_lock);
}
struct domain *d;
int sumw[NR_CPUS] = { 0 };
s_time_t sumt[NR_CPUS] = { 0 };
-
+
/* Sum across all weights. */
+ rcu_read_lock(&domlist_read_lock);
for_each_domain( d )
{
for_each_vcpu( d, p )
}
}
}
+ rcu_read_unlock(&domlist_read_lock);
/* Adjust all slices (and periods) to the new weight. */
+ rcu_read_lock(&domlist_read_lock);
for_each_domain( d )
{
for_each_vcpu ( d, p )
}
}
}
+ rcu_read_unlock(&domlist_read_lock);
return 0;
}
struct xen_domctl_getdomaininfo info;
u32 num_domains = 0;
- read_lock(&domlist_lock);
+ rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
{
num_domains++;
}
- read_unlock(&domlist_lock);
+ rcu_read_unlock(&domlist_read_lock);
if ( ret != 0 )
break;
int rcu_pending(int cpu);
int rcu_needs_cpu(int cpu);
+/*
+ * Dummy lock type for passing to rcu_read_{lock,unlock}. Currently exists
+ * only to document the reason for rcu_read_lock() critical sections.
+ */
+struct _rcu_read_lock {};
+typedef struct _rcu_read_lock rcu_read_lock_t;
+#define DEFINE_RCU_READ_LOCK(x) rcu_read_lock_t x
+
+/**
+ * rcu_read_lock - mark the beginning of an RCU read-side critical section.
+ *
+ * When call_rcu() is invoked
+ * on one CPU while other CPUs are within RCU read-side critical
+ * sections, invocation of the corresponding RCU callback is deferred
+ * until after the all the other CPUs exit their critical sections.
+ *
+ * Note, however, that RCU callbacks are permitted to run concurrently
+ * with RCU read-side critical sections. One way that this can happen
+ * is via the following sequence of events: (1) CPU 0 enters an RCU
+ * read-side critical section, (2) CPU 1 invokes call_rcu() to register
+ * an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
+ * (4) CPU 2 enters a RCU read-side critical section, (5) the RCU
+ * callback is invoked. This is legal, because the RCU read-side critical
+ * section that was running concurrently with the call_rcu() (and which
+ * therefore might be referencing something that the corresponding RCU
+ * callback would free up) has completed before the corresponding
+ * RCU callback is invoked.
+ *
+ * RCU read-side critical sections may be nested. Any deferred actions
+ * will be deferred until the outermost RCU read-side critical section
+ * completes.
+ *
+ * It is illegal to block while in an RCU read-side critical section.
+ */
+#define rcu_read_lock(x) do { } while (0)
+
+/**
+ * rcu_read_unlock - marks the end of an RCU read-side critical section.
+ *
+ * See rcu_read_lock() for more information.
+ */
+#define rcu_read_unlock(x) do { } while (0)
+
+/*
+ * So where is rcu_write_lock()? It does not exist, as there is no
+ * way for writers to lock out RCU readers. This is a feature, not
+ * a bug -- this property is what provides RCU's performance benefits.
+ * Of course, writers must coordinate with each other. The normal
+ * spinlock primitives work well for this, but any other technique may be
+ * used as well. RCU does not care how the writers keep out of each
+ * others' way, as long as they do so.
+ */
+
/**
* rcu_dereference - fetch an RCU-protected pointer in an
* RCU read-side critical section. This pointer may later
#include <xen/rangeset.h>
#include <asm/domain.h>
#include <xen/xenoprof.h>
+#include <xen/rcupdate.h>
#include <xen/irq.h>
#ifdef CONFIG_COMPAT
#endif
extern unsigned long volatile jiffies;
-extern rwlock_t domlist_lock;
/* A global pointer to the initial domain (DOM0). */
extern struct domain *dom0;
/* OProfile support. */
struct xenoprof *xenoprof;
int32_t time_offset_seconds;
+
+ struct rcu_head rcu;
};
struct domain_setup_info
local_events_need_delivery() \
))
-/* This domain_hash and domain_list are protected by the domlist_lock. */
-#define DOMAIN_HASH_SIZE 256
-#define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
-extern struct domain *domain_hash[DOMAIN_HASH_SIZE];
+/* Protect updates/reads (resp.) of domain_list and domain_hash. */
+extern spinlock_t domlist_update_lock;
+extern rcu_read_lock_t domlist_read_lock;
+
extern struct domain *domain_list;
+/* Caller must hold the domlist_read_lock or domlist_update_lock. */
#define for_each_domain(_d) \
- for ( (_d) = domain_list; \
+ for ( (_d) = rcu_dereference(domain_list); \
(_d) != NULL; \
- (_d) = (_d)->next_in_list )
+ (_d) = rcu_dereference((_d)->next_in_list )) \
#define for_each_vcpu(_d,_v) \
for ( (_v) = (_d)->vcpu[0]; \